/* armv8-32-sha512-asm
 *
 * Copyright (C) 2006-2020 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

/* Generated using (from wolfssl):
 *   cd ../scripts
 *   ruby ./sha2/sha512.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.c
 */
#ifdef WOLFSSL_ARMASM
#ifndef __aarch64__
#include <stdint.h>
#ifdef HAVE_CONFIG_H
    #include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/sha512.h>

#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_len_k[] = {
    0x428a2f98d728ae22UL,
    0x7137449123ef65cdUL,
    0xb5c0fbcfec4d3b2fUL,
    0xe9b5dba58189dbbcUL,
    0x3956c25bf348b538UL,
    0x59f111f1b605d019UL,
    0x923f82a4af194f9bUL,
    0xab1c5ed5da6d8118UL,
    0xd807aa98a3030242UL,
    0x12835b0145706fbeUL,
    0x243185be4ee4b28cUL,
    0x550c7dc3d5ffb4e2UL,
    0x72be5d74f27b896fUL,
    0x80deb1fe3b1696b1UL,
    0x9bdc06a725c71235UL,
    0xc19bf174cf692694UL,
    0xe49b69c19ef14ad2UL,
    0xefbe4786384f25e3UL,
    0xfc19dc68b8cd5b5UL,
    0x240ca1cc77ac9c65UL,
    0x2de92c6f592b0275UL,
    0x4a7484aa6ea6e483UL,
    0x5cb0a9dcbd41fbd4UL,
    0x76f988da831153b5UL,
    0x983e5152ee66dfabUL,
    0xa831c66d2db43210UL,
    0xb00327c898fb213fUL,
    0xbf597fc7beef0ee4UL,
    0xc6e00bf33da88fc2UL,
    0xd5a79147930aa725UL,
    0x6ca6351e003826fUL,
    0x142929670a0e6e70UL,
    0x27b70a8546d22ffcUL,
    0x2e1b21385c26c926UL,
    0x4d2c6dfc5ac42aedUL,
    0x53380d139d95b3dfUL,
    0x650a73548baf63deUL,
    0x766a0abb3c77b2a8UL,
    0x81c2c92e47edaee6UL,
    0x92722c851482353bUL,
    0xa2bfe8a14cf10364UL,
    0xa81a664bbc423001UL,
    0xc24b8b70d0f89791UL,
    0xc76c51a30654be30UL,
    0xd192e819d6ef5218UL,
    0xd69906245565a910UL,
    0xf40e35855771202aUL,
    0x106aa07032bbd1b8UL,
    0x19a4c116b8d2d0c8UL,
    0x1e376c085141ab53UL,
    0x2748774cdf8eeb99UL,
    0x34b0bcb5e19b48a8UL,
    0x391c0cb3c5c95a63UL,
    0x4ed8aa4ae3418acbUL,
    0x5b9cca4f7763e373UL,
    0x682e6ff3d6b2b8a3UL,
    0x748f82ee5defb2fcUL,
    0x78a5636f43172f60UL,
    0x84c87814a1f0ab72UL,
    0x8cc702081a6439ecUL,
    0x90befffa23631e28UL,
    0xa4506cebde82bde9UL,
    0xbef9a3f7b2c67915UL,
    0xc67178f2e372532bUL,
    0xca273eceea26619cUL,
    0xd186b8c721c0c207UL,
    0xeada7dd6cde0eb1eUL,
    0xf57d4f7fee6ed178UL,
    0x6f067aa72176fbaUL,
    0xa637dc5a2c898a6UL,
    0x113f9804bef90daeUL,
    0x1b710b35131c471bUL,
    0x28db77f523047d84UL,
    0x32caab7b40c72493UL,
    0x3c9ebe0a15c9bebcUL,
    0x431d67c49c100d4cUL,
    0x4cc5d4becb3e42b6UL,
    0x597f299cfc657e2aUL,
    0x5fcb6fab3ad6faecUL,
    0x6c44198c4a475817UL,
};

void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #0xc0\n\t"
        "mov	r3, %[L_SHA512_transform_len_k]\n\t"
        /* Copy digest to add in at end */
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "ldrd	r8, r9, [%[sha512], #24]\n\t"
        "strd	r12, lr, [sp, #128]\n\t"
        "strd	r4, r5, [sp, #136]\n\t"
        "strd	r6, r7, [sp, #144]\n\t"
        "strd	r8, r9, [sp, #152]\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "ldrd	r8, r9, [%[sha512], #56]\n\t"
        "strd	r12, lr, [sp, #160]\n\t"
        "strd	r4, r5, [sp, #168]\n\t"
        "strd	r6, r7, [sp, #176]\n\t"
        "strd	r8, r9, [sp, #184]\n\t"
        /* Start of loop processing a block */
        "\n"
    "L_sha512_len_neon_begin_%=: \n\t"
        /* Load, Reverse and Store W */
        "ldrd	r12, lr, [%[data]]\n\t"
        "ldrd	r4, r5, [%[data], #8]\n\t"
        "ldrd	r6, r7, [%[data], #16]\n\t"
        "ldrd	r8, r9, [%[data], #24]\n\t"
        "rev	r12, r12\n\t"
        "rev	lr, lr\n\t"
        "rev	r4, r4\n\t"
        "rev	r5, r5\n\t"
        "rev	r6, r6\n\t"
        "rev	r7, r7\n\t"
        "rev	r8, r8\n\t"
        "rev	r9, r9\n\t"
        "str	lr, [sp]\n\t"
        "str	r12, [sp, #4]\n\t"
        "str	r5, [sp, #8]\n\t"
        "str	r4, [sp, #12]\n\t"
        "str	r7, [sp, #16]\n\t"
        "str	r6, [sp, #20]\n\t"
        "str	r9, [sp, #24]\n\t"
        "str	r8, [sp, #28]\n\t"
        "ldrd	r12, lr, [%[data], #32]\n\t"
        "ldrd	r4, r5, [%[data], #40]\n\t"
        "ldrd	r6, r7, [%[data], #48]\n\t"
        "ldrd	r8, r9, [%[data], #56]\n\t"
        "rev	r12, r12\n\t"
        "rev	lr, lr\n\t"
        "rev	r4, r4\n\t"
        "rev	r5, r5\n\t"
        "rev	r6, r6\n\t"
        "rev	r7, r7\n\t"
        "rev	r8, r8\n\t"
        "rev	r9, r9\n\t"
        "str	lr, [sp, #32]\n\t"
        "str	r12, [sp, #36]\n\t"
        "str	r5, [sp, #40]\n\t"
        "str	r4, [sp, #44]\n\t"
        "str	r7, [sp, #48]\n\t"
        "str	r6, [sp, #52]\n\t"
        "str	r9, [sp, #56]\n\t"
        "str	r8, [sp, #60]\n\t"
        "ldrd	r12, lr, [%[data], #64]\n\t"
        "ldrd	r4, r5, [%[data], #72]\n\t"
        "ldrd	r6, r7, [%[data], #80]\n\t"
        "ldrd	r8, r9, [%[data], #88]\n\t"
        "rev	r12, r12\n\t"
        "rev	lr, lr\n\t"
        "rev	r4, r4\n\t"
        "rev	r5, r5\n\t"
        "rev	r6, r6\n\t"
        "rev	r7, r7\n\t"
        "rev	r8, r8\n\t"
        "rev	r9, r9\n\t"
        "str	lr, [sp, #64]\n\t"
        "str	r12, [sp, #68]\n\t"
        "str	r5, [sp, #72]\n\t"
        "str	r4, [sp, #76]\n\t"
        "str	r7, [sp, #80]\n\t"
        "str	r6, [sp, #84]\n\t"
        "str	r9, [sp, #88]\n\t"
        "str	r8, [sp, #92]\n\t"
        "ldrd	r12, lr, [%[data], #96]\n\t"
        "ldrd	r4, r5, [%[data], #104]\n\t"
        "ldrd	r6, r7, [%[data], #112]\n\t"
        "ldrd	r8, r9, [%[data], #120]\n\t"
        "rev	r12, r12\n\t"
        "rev	lr, lr\n\t"
        "rev	r4, r4\n\t"
        "rev	r5, r5\n\t"
        "rev	r6, r6\n\t"
        "rev	r7, r7\n\t"
        "rev	r8, r8\n\t"
        "rev	r9, r9\n\t"
        "str	lr, [sp, #96]\n\t"
        "str	r12, [sp, #100]\n\t"
        "str	r5, [sp, #104]\n\t"
        "str	r4, [sp, #108]\n\t"
        "str	r7, [sp, #112]\n\t"
        "str	r6, [sp, #116]\n\t"
        "str	r9, [sp, #120]\n\t"
        "str	r8, [sp, #124]\n\t"
        /* Pre-calc: b ^ c */
        "ldrd	r8, r9, [%[sha512], #8]\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r8, r8, r12\n\t"
        "eor	r9, r9, lr\n\t"
        "mov	r10, #4\n\t"
        /* Start of 16 rounds */
        "\n"
    "L_sha512_len_neon_start_%=: \n\t"
        /* Round 0 */
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [sp]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "strd	r6, r7, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #56]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[0] */
        "ldrd	r12, lr, [sp, #112]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp]\n\t"
        "ldrd	r6, r7, [sp, #72]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp]\n\t"
        "ldrd	r12, lr, [sp, #8]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp]\n\t"
        /* Round 1 */
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [sp, #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #8]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "strd	r6, r7, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #48]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[1] */
        "ldrd	r12, lr, [sp, #120]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #8]\n\t"
        "ldrd	r6, r7, [sp, #80]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #8]\n\t"
        "ldrd	r12, lr, [sp, #16]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #8]\n\t"
        /* Round 2 */
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [sp, #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #16]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "strd	r6, r7, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #40]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[2] */
        "ldrd	r12, lr, [sp]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #16]\n\t"
        "ldrd	r6, r7, [sp, #88]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #16]\n\t"
        "ldrd	r12, lr, [sp, #24]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #16]\n\t"
        /* Round 3 */
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [sp, #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #24]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "strd	r6, r7, [%[sha512]]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #32]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[3] */
        "ldrd	r12, lr, [sp, #8]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #24]\n\t"
        "ldrd	r6, r7, [sp, #96]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #24]\n\t"
        "ldrd	r12, lr, [sp, #32]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #24]\n\t"
        /* Round 4 */
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [sp, #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #32]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "strd	r6, r7, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #24]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[4] */
        "ldrd	r12, lr, [sp, #16]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #32]\n\t"
        "ldrd	r6, r7, [sp, #104]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #32]\n\t"
        "ldrd	r12, lr, [sp, #40]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #32]\n\t"
        /* Round 5 */
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [sp, #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #40]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "strd	r6, r7, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #16]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[5] */
        "ldrd	r12, lr, [sp, #24]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #40]\n\t"
        "ldrd	r6, r7, [sp, #112]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #40]\n\t"
        "ldrd	r12, lr, [sp, #48]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #40]\n\t"
        /* Round 6 */
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [sp, #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #48]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "strd	r6, r7, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #8]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[6] */
        "ldrd	r12, lr, [sp, #32]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #48]\n\t"
        "ldrd	r6, r7, [sp, #120]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #48]\n\t"
        "ldrd	r12, lr, [sp, #56]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #48]\n\t"
        /* Round 7 */
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r6, r7, [sp, #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #56]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "strd	r6, r7, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512]]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[7] */
        "ldrd	r12, lr, [sp, #40]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #56]\n\t"
        "ldrd	r6, r7, [sp]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #56]\n\t"
        "ldrd	r12, lr, [sp, #64]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #56]\n\t"
        /* Round 8 */
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [sp, #64]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #64]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "strd	r6, r7, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #56]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[8] */
        "ldrd	r12, lr, [sp, #48]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #64]\n\t"
        "ldrd	r6, r7, [sp, #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #64]\n\t"
        "ldrd	r12, lr, [sp, #72]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #64]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #64]\n\t"
        /* Round 9 */
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [sp, #72]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #72]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "strd	r6, r7, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #48]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[9] */
        "ldrd	r12, lr, [sp, #56]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #72]\n\t"
        "ldrd	r6, r7, [sp, #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #72]\n\t"
        "ldrd	r12, lr, [sp, #80]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #72]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #72]\n\t"
        /* Round 10 */
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [sp, #80]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #80]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "strd	r6, r7, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #40]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[10] */
        "ldrd	r12, lr, [sp, #64]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #80]\n\t"
        "ldrd	r6, r7, [sp, #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #80]\n\t"
        "ldrd	r12, lr, [sp, #88]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #80]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #80]\n\t"
        /* Round 11 */
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [sp, #88]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #88]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "strd	r6, r7, [%[sha512]]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #32]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[11] */
        "ldrd	r12, lr, [sp, #72]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #88]\n\t"
        "ldrd	r6, r7, [sp, #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #88]\n\t"
        "ldrd	r12, lr, [sp, #96]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #88]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #88]\n\t"
        /* Round 12 */
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [sp, #96]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #96]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "strd	r6, r7, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #24]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[12] */
        "ldrd	r12, lr, [sp, #80]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #96]\n\t"
        "ldrd	r6, r7, [sp, #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #96]\n\t"
        "ldrd	r12, lr, [sp, #104]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #96]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #96]\n\t"
        /* Round 13 */
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [sp, #104]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #104]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "strd	r6, r7, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #16]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[13] */
        "ldrd	r12, lr, [sp, #88]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #104]\n\t"
        "ldrd	r6, r7, [sp, #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #104]\n\t"
        "ldrd	r12, lr, [sp, #112]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #104]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #104]\n\t"
        /* Round 14 */
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [sp, #112]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #112]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "strd	r6, r7, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #8]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[14] */
        "ldrd	r12, lr, [sp, #96]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #112]\n\t"
        "ldrd	r6, r7, [sp, #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #112]\n\t"
        "ldrd	r12, lr, [sp, #120]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #112]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #112]\n\t"
        /* Round 15 */
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r6, r7, [sp, #120]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #120]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "strd	r6, r7, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512]]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Calc new W[15] */
        "ldrd	r12, lr, [sp, #104]\n\t"
        "lsrs	r4, r12, #19\n\t"
        "lsrs	r5, lr, #19\n\t"
        "orr	r5, r5, r12, lsl 13\n\t"
        "orr	r4, r4, lr, lsl 13\n\t"
        "lsls	r6, r12, #3\n\t"
        "lsls	r7, lr, #3\n\t"
        "orr	r7, r7, r12, lsr 29\n\t"
        "orr	r6, r6, lr, lsr 29\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #6\n\t"
        "lsrs	r7, lr, #6\n\t"
        "orr	r6, r6, lr, lsl 26\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #120]\n\t"
        "ldrd	r6, r7, [sp, #64]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "strd	r12, lr, [sp, #120]\n\t"
        "ldrd	r12, lr, [sp]\n\t"
        "lsrs	r4, r12, #1\n\t"
        "lsrs	r5, lr, #1\n\t"
        "orr	r5, r5, r12, lsl 31\n\t"
        "orr	r4, r4, lr, lsl 31\n\t"
        "lsrs	r6, r12, #8\n\t"
        "lsrs	r7, lr, #8\n\t"
        "orr	r7, r7, r12, lsl 24\n\t"
        "orr	r6, r6, lr, lsl 24\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "lsrs	r6, r12, #7\n\t"
        "lsrs	r7, lr, #7\n\t"
        "orr	r6, r6, lr, lsl 25\n\t"
        "eor	r5, r5, r7\n\t"
        "eor	r4, r4, r6\n\t"
        "ldrd	r12, lr, [sp, #120]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [sp, #120]\n\t"
        "add	r3, r3, #0x80\n\t"
        "subs	r10, r10, #1\n\t"
        "bne	L_sha512_len_neon_start_%=\n\t"
        /* Round 0 */
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [sp]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "strd	r6, r7, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #56]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 1 */
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [sp, #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #8]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "strd	r6, r7, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #48]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 2 */
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [sp, #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #16]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "strd	r6, r7, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #40]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 3 */
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [sp, #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #24]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "strd	r6, r7, [%[sha512]]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #32]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 4 */
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [sp, #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #32]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "strd	r6, r7, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #24]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 5 */
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [sp, #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #40]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "strd	r6, r7, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #16]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 6 */
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [sp, #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #48]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "strd	r6, r7, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #8]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 7 */
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r6, r7, [sp, #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #56]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "strd	r6, r7, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512]]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 8 */
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [sp, #64]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #64]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "strd	r6, r7, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "strd	r12, lr, [%[sha512], #56]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #56]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 9 */
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [sp, #72]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #72]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "strd	r6, r7, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #48]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 10 */
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [sp, #80]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #80]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "strd	r6, r7, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "strd	r12, lr, [%[sha512], #40]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #40]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 11 */
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r6, r7, [sp, #88]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #88]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "strd	r6, r7, [%[sha512]]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #32]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 12 */
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [sp, #96]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #96]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "strd	r6, r7, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "strd	r12, lr, [%[sha512], #24]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #24]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 13 */
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r12, lr, [%[sha512], #56]\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r6, r7, [sp, #104]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #104]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #48]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #24]\n\t"
        "strd	r6, r7, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #24]\n\t"
        "ldrd	r4, r5, [%[sha512], #32]\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #16]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 14 */
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [sp, #112]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #112]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #40]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "strd	r6, r7, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "strd	r12, lr, [%[sha512], #8]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512], #8]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Round 15 */
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "lsrs	r4, r12, #14\n\t"
        "lsrs	r5, lr, #14\n\t"
        "orr	r5, r5, r12, lsl 18\n\t"
        "orr	r4, r4, lr, lsl 18\n\t"
        "lsrs	r6, r12, #18\n\t"
        "lsrs	r7, lr, #18\n\t"
        "orr	r7, r7, r12, lsl 14\n\t"
        "orr	r6, r6, lr, lsl 14\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #23\n\t"
        "lsls	r7, lr, #23\n\t"
        "orr	r7, r7, r12, lsr 9\n\t"
        "orr	r6, r6, lr, lsr 9\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r12, lr, [%[sha512], #40]\n\t"
        "ldrd	r4, r5, [%[sha512], #48]\n\t"
        "ldrd	r6, r7, [%[sha512], #56]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "and	r4, r4, r12\n\t"
        "and	r5, r5, lr\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r6, r7, [sp, #120]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r4, r5, [r3, #120]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "ldrd	r6, r7, [%[sha512], #32]\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "adds	r6, r6, r12\n\t"
        "adc	r7, r7, lr\n\t"
        "ldrd	r12, lr, [%[sha512], #8]\n\t"
        "strd	r6, r7, [%[sha512], #32]\n\t"
        "lsrs	r4, r12, #28\n\t"
        "lsrs	r5, lr, #28\n\t"
        "orr	r5, r5, r12, lsl 4\n\t"
        "orr	r4, r4, lr, lsl 4\n\t"
        "lsls	r6, r12, #30\n\t"
        "lsls	r7, lr, #30\n\t"
        "orr	r7, r7, r12, lsr 2\n\t"
        "orr	r6, r6, lr, lsr 2\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "lsls	r6, r12, #25\n\t"
        "lsls	r7, lr, #25\n\t"
        "orr	r7, r7, r12, lsr 7\n\t"
        "orr	r6, r6, lr, lsr 7\n\t"
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "eor	r4, r4, r6\n\t"
        "eor	r5, r5, r7\n\t"
        "adds	r12, r12, r4\n\t"
        "adc	lr, lr, r5\n\t"
        "ldrd	r6, r7, [%[sha512], #8]\n\t"
        "ldrd	r4, r5, [%[sha512], #16]\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "eor	r6, r6, r4\n\t"
        "eor	r7, r7, r5\n\t"
        "and	r8, r8, r6\n\t"
        "and	r9, r9, r7\n\t"
        "eor	r8, r8, r4\n\t"
        "eor	r9, r9, r5\n\t"
        "ldrd	r4, r5, [%[sha512]]\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r4, r5, [%[sha512]]\n\t"
        "mov	r8, r6\n\t"
        "mov	r9, r7\n\t"
        /* Add in digest from start */
        "ldrd	r12, lr, [%[sha512]]\n\t"
        "ldrd	r4, r5, [%[sha512], #8]\n\t"
        "ldrd	r6, r7, [sp, #128]\n\t"
        "ldrd	r8, r9, [sp, #136]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r12, lr, [%[sha512]]\n\t"
        "strd	r4, r5, [%[sha512], #8]\n\t"
        "strd	r12, lr, [sp, #128]\n\t"
        "strd	r4, r5, [sp, #136]\n\t"
        "ldrd	r12, lr, [%[sha512], #16]\n\t"
        "ldrd	r4, r5, [%[sha512], #24]\n\t"
        "ldrd	r6, r7, [sp, #144]\n\t"
        "ldrd	r8, r9, [sp, #152]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r12, lr, [%[sha512], #16]\n\t"
        "strd	r4, r5, [%[sha512], #24]\n\t"
        "strd	r12, lr, [sp, #144]\n\t"
        "strd	r4, r5, [sp, #152]\n\t"
        "ldrd	r12, lr, [%[sha512], #32]\n\t"
        "ldrd	r4, r5, [%[sha512], #40]\n\t"
        "ldrd	r6, r7, [sp, #160]\n\t"
        "ldrd	r8, r9, [sp, #168]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r12, lr, [%[sha512], #32]\n\t"
        "strd	r4, r5, [%[sha512], #40]\n\t"
        "strd	r12, lr, [sp, #160]\n\t"
        "strd	r4, r5, [sp, #168]\n\t"
        "ldrd	r12, lr, [%[sha512], #48]\n\t"
        "ldrd	r4, r5, [%[sha512], #56]\n\t"
        "ldrd	r6, r7, [sp, #176]\n\t"
        "ldrd	r8, r9, [sp, #184]\n\t"
        "adds	r12, r12, r6\n\t"
        "adc	lr, lr, r7\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r5, r5, r9\n\t"
        "strd	r12, lr, [%[sha512], #48]\n\t"
        "strd	r4, r5, [%[sha512], #56]\n\t"
        "strd	r12, lr, [sp, #176]\n\t"
        "strd	r4, r5, [sp, #184]\n\t"
        "subs	%[len], %[len], #0x80\n\t"
        "sub	r3, r3, #0x200\n\t"
        "add	%[data], %[data], #0x80\n\t"
        "bne	L_sha512_len_neon_begin_%=\n\t"
        "eor	r0, r0, r0\n\t"
        "add	sp, sp, #0xc0\n\t"
        : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
        : [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k)
        : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
}

#endif /* WOLFSSL_ARMASM_NO_NEON */
#include <wolfssl/wolfcrypt/sha512.h>

#ifndef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_neon_len_k[] = {
    0x428a2f98d728ae22UL,
    0x7137449123ef65cdUL,
    0xb5c0fbcfec4d3b2fUL,
    0xe9b5dba58189dbbcUL,
    0x3956c25bf348b538UL,
    0x59f111f1b605d019UL,
    0x923f82a4af194f9bUL,
    0xab1c5ed5da6d8118UL,
    0xd807aa98a3030242UL,
    0x12835b0145706fbeUL,
    0x243185be4ee4b28cUL,
    0x550c7dc3d5ffb4e2UL,
    0x72be5d74f27b896fUL,
    0x80deb1fe3b1696b1UL,
    0x9bdc06a725c71235UL,
    0xc19bf174cf692694UL,
    0xe49b69c19ef14ad2UL,
    0xefbe4786384f25e3UL,
    0xfc19dc68b8cd5b5UL,
    0x240ca1cc77ac9c65UL,
    0x2de92c6f592b0275UL,
    0x4a7484aa6ea6e483UL,
    0x5cb0a9dcbd41fbd4UL,
    0x76f988da831153b5UL,
    0x983e5152ee66dfabUL,
    0xa831c66d2db43210UL,
    0xb00327c898fb213fUL,
    0xbf597fc7beef0ee4UL,
    0xc6e00bf33da88fc2UL,
    0xd5a79147930aa725UL,
    0x6ca6351e003826fUL,
    0x142929670a0e6e70UL,
    0x27b70a8546d22ffcUL,
    0x2e1b21385c26c926UL,
    0x4d2c6dfc5ac42aedUL,
    0x53380d139d95b3dfUL,
    0x650a73548baf63deUL,
    0x766a0abb3c77b2a8UL,
    0x81c2c92e47edaee6UL,
    0x92722c851482353bUL,
    0xa2bfe8a14cf10364UL,
    0xa81a664bbc423001UL,
    0xc24b8b70d0f89791UL,
    0xc76c51a30654be30UL,
    0xd192e819d6ef5218UL,
    0xd69906245565a910UL,
    0xf40e35855771202aUL,
    0x106aa07032bbd1b8UL,
    0x19a4c116b8d2d0c8UL,
    0x1e376c085141ab53UL,
    0x2748774cdf8eeb99UL,
    0x34b0bcb5e19b48a8UL,
    0x391c0cb3c5c95a63UL,
    0x4ed8aa4ae3418acbUL,
    0x5b9cca4f7763e373UL,
    0x682e6ff3d6b2b8a3UL,
    0x748f82ee5defb2fcUL,
    0x78a5636f43172f60UL,
    0x84c87814a1f0ab72UL,
    0x8cc702081a6439ecUL,
    0x90befffa23631e28UL,
    0xa4506cebde82bde9UL,
    0xbef9a3f7b2c67915UL,
    0xc67178f2e372532bUL,
    0xca273eceea26619cUL,
    0xd186b8c721c0c207UL,
    0xeada7dd6cde0eb1eUL,
    0xf57d4f7fee6ed178UL,
    0x6f067aa72176fbaUL,
    0xa637dc5a2c898a6UL,
    0x113f9804bef90daeUL,
    0x1b710b35131c471bUL,
    0x28db77f523047d84UL,
    0x32caab7b40c72493UL,
    0x3c9ebe0a15c9bebcUL,
    0x431d67c49c100d4cUL,
    0x4cc5d4becb3e42b6UL,
    0x597f299cfc657e2aUL,
    0x5fcb6fab3ad6faecUL,
    0x6c44198c4a475817UL,
};

void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
{
    __asm__ __volatile__ (
        /* Load digest into working vars */
        "vldm.64	%[sha512], {d0-d7}\n\t"
        /* Start of loop processing a block */
        "\n"
    "L_sha512_len_neon_begin_%=: \n\t"
        /* Load W */
        "vldm.64	%[data]!, {d16-d31}\n\t"
        "vrev64.8	q8, q8\n\t"
        "vrev64.8	q9, q9\n\t"
        "vrev64.8	q10, q10\n\t"
        "vrev64.8	q11, q11\n\t"
        "vrev64.8	q12, q12\n\t"
        "vrev64.8	q13, q13\n\t"
        "vrev64.8	q14, q14\n\t"
        "vrev64.8	q15, q15\n\t"
        "mov	r3, %[L_SHA512_transform_neon_len_k]\n\t"
        "mov	r12, #4\n\t"
        /* Start of 16 rounds */
        "\n"
    "L_sha512_len_neon_start_%=: \n\t"
        /* Round 0 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d4, #50\n\t"
        "vsri.u64	d8, d4, #14\n\t"
        "vshl.u64	d9, d0, #36\n\t"
        "vsri.u64	d9, d0, #28\n\t"
        "vshl.u64	d10, d4, #46\n\t"
        "vsri.u64	d10, d4, #18\n\t"
        "vshl.u64	d11, d0, #30\n\t"
        "vsri.u64	d11, d0, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d4, #23\n\t"
        "vsri.u64	d10, d4, #41\n\t"
        "vshl.u64	d11, d0, #25\n\t"
        "vsri.u64	d11, d0, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d12, d16\n\t"
        "vmov	d8, d4\n\t"
        "veor	d10, d1, d2\n\t"
        "vadd.i64	d7, d12\n\t"
        "vbsl	d8, d5, d6\n\t"
        "vbsl	d10, d0, d2\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d3, d7\n\t"
        "vadd.i64	d7, d10\n\t"
        /* Round 1 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d3, #50\n\t"
        "vsri.u64	d8, d3, #14\n\t"
        "vshl.u64	d9, d7, #36\n\t"
        "vsri.u64	d9, d7, #28\n\t"
        "vshl.u64	d10, d3, #46\n\t"
        "vsri.u64	d10, d3, #18\n\t"
        "vshl.u64	d11, d7, #30\n\t"
        "vsri.u64	d11, d7, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d3, #23\n\t"
        "vsri.u64	d10, d3, #41\n\t"
        "vshl.u64	d11, d7, #25\n\t"
        "vsri.u64	d11, d7, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d12, d17\n\t"
        "vmov	d8, d3\n\t"
        "veor	d10, d0, d1\n\t"
        "vadd.i64	d6, d12\n\t"
        "vbsl	d8, d4, d5\n\t"
        "vbsl	d10, d7, d1\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d2, d6\n\t"
        "vadd.i64	d6, d10\n\t"
        /* Calc new W[0]-W[1] */
        "vext.8	q6, q8, q9, #8\n\t"
        "vshl.u64	q4, q15, #45\n\t"
        "vsri.u64	q4, q15, #19\n\t"
        "vshl.u64	q5, q15, #3\n\t"
        "vsri.u64	q5, q15, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q15, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q8, q5\n\t"
        "vext.8	q7, q12, q13, #8\n\t"
        "vadd.i64	q8, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q8, q5\n\t"
        /* Round 2 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d2, #50\n\t"
        "vsri.u64	d8, d2, #14\n\t"
        "vshl.u64	d9, d6, #36\n\t"
        "vsri.u64	d9, d6, #28\n\t"
        "vshl.u64	d10, d2, #46\n\t"
        "vsri.u64	d10, d2, #18\n\t"
        "vshl.u64	d11, d6, #30\n\t"
        "vsri.u64	d11, d6, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d2, #23\n\t"
        "vsri.u64	d10, d2, #41\n\t"
        "vshl.u64	d11, d6, #25\n\t"
        "vsri.u64	d11, d6, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d12, d18\n\t"
        "vmov	d8, d2\n\t"
        "veor	d10, d7, d0\n\t"
        "vadd.i64	d5, d12\n\t"
        "vbsl	d8, d3, d4\n\t"
        "vbsl	d10, d6, d0\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d1, d5\n\t"
        "vadd.i64	d5, d10\n\t"
        /* Round 3 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d1, #50\n\t"
        "vsri.u64	d8, d1, #14\n\t"
        "vshl.u64	d9, d5, #36\n\t"
        "vsri.u64	d9, d5, #28\n\t"
        "vshl.u64	d10, d1, #46\n\t"
        "vsri.u64	d10, d1, #18\n\t"
        "vshl.u64	d11, d5, #30\n\t"
        "vsri.u64	d11, d5, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d1, #23\n\t"
        "vsri.u64	d10, d1, #41\n\t"
        "vshl.u64	d11, d5, #25\n\t"
        "vsri.u64	d11, d5, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d12, d19\n\t"
        "vmov	d8, d1\n\t"
        "veor	d10, d6, d7\n\t"
        "vadd.i64	d4, d12\n\t"
        "vbsl	d8, d2, d3\n\t"
        "vbsl	d10, d5, d7\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d0, d4\n\t"
        "vadd.i64	d4, d10\n\t"
        /* Calc new W[2]-W[3] */
        "vext.8	q6, q9, q10, #8\n\t"
        "vshl.u64	q4, q8, #45\n\t"
        "vsri.u64	q4, q8, #19\n\t"
        "vshl.u64	q5, q8, #3\n\t"
        "vsri.u64	q5, q8, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q8, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q9, q5\n\t"
        "vext.8	q7, q13, q14, #8\n\t"
        "vadd.i64	q9, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q9, q5\n\t"
        /* Round 4 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d0, #50\n\t"
        "vsri.u64	d8, d0, #14\n\t"
        "vshl.u64	d9, d4, #36\n\t"
        "vsri.u64	d9, d4, #28\n\t"
        "vshl.u64	d10, d0, #46\n\t"
        "vsri.u64	d10, d0, #18\n\t"
        "vshl.u64	d11, d4, #30\n\t"
        "vsri.u64	d11, d4, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d0, #23\n\t"
        "vsri.u64	d10, d0, #41\n\t"
        "vshl.u64	d11, d4, #25\n\t"
        "vsri.u64	d11, d4, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d12, d20\n\t"
        "vmov	d8, d0\n\t"
        "veor	d10, d5, d6\n\t"
        "vadd.i64	d3, d12\n\t"
        "vbsl	d8, d1, d2\n\t"
        "vbsl	d10, d4, d6\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d7, d3\n\t"
        "vadd.i64	d3, d10\n\t"
        /* Round 5 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d7, #50\n\t"
        "vsri.u64	d8, d7, #14\n\t"
        "vshl.u64	d9, d3, #36\n\t"
        "vsri.u64	d9, d3, #28\n\t"
        "vshl.u64	d10, d7, #46\n\t"
        "vsri.u64	d10, d7, #18\n\t"
        "vshl.u64	d11, d3, #30\n\t"
        "vsri.u64	d11, d3, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d7, #23\n\t"
        "vsri.u64	d10, d7, #41\n\t"
        "vshl.u64	d11, d3, #25\n\t"
        "vsri.u64	d11, d3, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d12, d21\n\t"
        "vmov	d8, d7\n\t"
        "veor	d10, d4, d5\n\t"
        "vadd.i64	d2, d12\n\t"
        "vbsl	d8, d0, d1\n\t"
        "vbsl	d10, d3, d5\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d6, d2\n\t"
        "vadd.i64	d2, d10\n\t"
        /* Calc new W[4]-W[5] */
        "vext.8	q6, q10, q11, #8\n\t"
        "vshl.u64	q4, q9, #45\n\t"
        "vsri.u64	q4, q9, #19\n\t"
        "vshl.u64	q5, q9, #3\n\t"
        "vsri.u64	q5, q9, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q9, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q10, q5\n\t"
        "vext.8	q7, q14, q15, #8\n\t"
        "vadd.i64	q10, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q10, q5\n\t"
        /* Round 6 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d6, #50\n\t"
        "vsri.u64	d8, d6, #14\n\t"
        "vshl.u64	d9, d2, #36\n\t"
        "vsri.u64	d9, d2, #28\n\t"
        "vshl.u64	d10, d6, #46\n\t"
        "vsri.u64	d10, d6, #18\n\t"
        "vshl.u64	d11, d2, #30\n\t"
        "vsri.u64	d11, d2, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d6, #23\n\t"
        "vsri.u64	d10, d6, #41\n\t"
        "vshl.u64	d11, d2, #25\n\t"
        "vsri.u64	d11, d2, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d12, d22\n\t"
        "vmov	d8, d6\n\t"
        "veor	d10, d3, d4\n\t"
        "vadd.i64	d1, d12\n\t"
        "vbsl	d8, d7, d0\n\t"
        "vbsl	d10, d2, d4\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d5, d1\n\t"
        "vadd.i64	d1, d10\n\t"
        /* Round 7 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d5, #50\n\t"
        "vsri.u64	d8, d5, #14\n\t"
        "vshl.u64	d9, d1, #36\n\t"
        "vsri.u64	d9, d1, #28\n\t"
        "vshl.u64	d10, d5, #46\n\t"
        "vsri.u64	d10, d5, #18\n\t"
        "vshl.u64	d11, d1, #30\n\t"
        "vsri.u64	d11, d1, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d5, #23\n\t"
        "vsri.u64	d10, d5, #41\n\t"
        "vshl.u64	d11, d1, #25\n\t"
        "vsri.u64	d11, d1, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d12, d23\n\t"
        "vmov	d8, d5\n\t"
        "veor	d10, d2, d3\n\t"
        "vadd.i64	d0, d12\n\t"
        "vbsl	d8, d6, d7\n\t"
        "vbsl	d10, d1, d3\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d4, d0\n\t"
        "vadd.i64	d0, d10\n\t"
        /* Calc new W[6]-W[7] */
        "vext.8	q6, q11, q12, #8\n\t"
        "vshl.u64	q4, q10, #45\n\t"
        "vsri.u64	q4, q10, #19\n\t"
        "vshl.u64	q5, q10, #3\n\t"
        "vsri.u64	q5, q10, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q10, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q11, q5\n\t"
        "vext.8	q7, q15, q8, #8\n\t"
        "vadd.i64	q11, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q11, q5\n\t"
        /* Round 8 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d4, #50\n\t"
        "vsri.u64	d8, d4, #14\n\t"
        "vshl.u64	d9, d0, #36\n\t"
        "vsri.u64	d9, d0, #28\n\t"
        "vshl.u64	d10, d4, #46\n\t"
        "vsri.u64	d10, d4, #18\n\t"
        "vshl.u64	d11, d0, #30\n\t"
        "vsri.u64	d11, d0, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d4, #23\n\t"
        "vsri.u64	d10, d4, #41\n\t"
        "vshl.u64	d11, d0, #25\n\t"
        "vsri.u64	d11, d0, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d12, d24\n\t"
        "vmov	d8, d4\n\t"
        "veor	d10, d1, d2\n\t"
        "vadd.i64	d7, d12\n\t"
        "vbsl	d8, d5, d6\n\t"
        "vbsl	d10, d0, d2\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d3, d7\n\t"
        "vadd.i64	d7, d10\n\t"
        /* Round 9 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d3, #50\n\t"
        "vsri.u64	d8, d3, #14\n\t"
        "vshl.u64	d9, d7, #36\n\t"
        "vsri.u64	d9, d7, #28\n\t"
        "vshl.u64	d10, d3, #46\n\t"
        "vsri.u64	d10, d3, #18\n\t"
        "vshl.u64	d11, d7, #30\n\t"
        "vsri.u64	d11, d7, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d3, #23\n\t"
        "vsri.u64	d10, d3, #41\n\t"
        "vshl.u64	d11, d7, #25\n\t"
        "vsri.u64	d11, d7, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d12, d25\n\t"
        "vmov	d8, d3\n\t"
        "veor	d10, d0, d1\n\t"
        "vadd.i64	d6, d12\n\t"
        "vbsl	d8, d4, d5\n\t"
        "vbsl	d10, d7, d1\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d2, d6\n\t"
        "vadd.i64	d6, d10\n\t"
        /* Calc new W[8]-W[9] */
        "vext.8	q6, q12, q13, #8\n\t"
        "vshl.u64	q4, q11, #45\n\t"
        "vsri.u64	q4, q11, #19\n\t"
        "vshl.u64	q5, q11, #3\n\t"
        "vsri.u64	q5, q11, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q11, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q12, q5\n\t"
        "vext.8	q7, q8, q9, #8\n\t"
        "vadd.i64	q12, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q12, q5\n\t"
        /* Round 10 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d2, #50\n\t"
        "vsri.u64	d8, d2, #14\n\t"
        "vshl.u64	d9, d6, #36\n\t"
        "vsri.u64	d9, d6, #28\n\t"
        "vshl.u64	d10, d2, #46\n\t"
        "vsri.u64	d10, d2, #18\n\t"
        "vshl.u64	d11, d6, #30\n\t"
        "vsri.u64	d11, d6, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d2, #23\n\t"
        "vsri.u64	d10, d2, #41\n\t"
        "vshl.u64	d11, d6, #25\n\t"
        "vsri.u64	d11, d6, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d12, d26\n\t"
        "vmov	d8, d2\n\t"
        "veor	d10, d7, d0\n\t"
        "vadd.i64	d5, d12\n\t"
        "vbsl	d8, d3, d4\n\t"
        "vbsl	d10, d6, d0\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d1, d5\n\t"
        "vadd.i64	d5, d10\n\t"
        /* Round 11 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d1, #50\n\t"
        "vsri.u64	d8, d1, #14\n\t"
        "vshl.u64	d9, d5, #36\n\t"
        "vsri.u64	d9, d5, #28\n\t"
        "vshl.u64	d10, d1, #46\n\t"
        "vsri.u64	d10, d1, #18\n\t"
        "vshl.u64	d11, d5, #30\n\t"
        "vsri.u64	d11, d5, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d1, #23\n\t"
        "vsri.u64	d10, d1, #41\n\t"
        "vshl.u64	d11, d5, #25\n\t"
        "vsri.u64	d11, d5, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d12, d27\n\t"
        "vmov	d8, d1\n\t"
        "veor	d10, d6, d7\n\t"
        "vadd.i64	d4, d12\n\t"
        "vbsl	d8, d2, d3\n\t"
        "vbsl	d10, d5, d7\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d0, d4\n\t"
        "vadd.i64	d4, d10\n\t"
        /* Calc new W[10]-W[11] */
        "vext.8	q6, q13, q14, #8\n\t"
        "vshl.u64	q4, q12, #45\n\t"
        "vsri.u64	q4, q12, #19\n\t"
        "vshl.u64	q5, q12, #3\n\t"
        "vsri.u64	q5, q12, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q12, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q13, q5\n\t"
        "vext.8	q7, q9, q10, #8\n\t"
        "vadd.i64	q13, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q13, q5\n\t"
        /* Round 12 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d0, #50\n\t"
        "vsri.u64	d8, d0, #14\n\t"
        "vshl.u64	d9, d4, #36\n\t"
        "vsri.u64	d9, d4, #28\n\t"
        "vshl.u64	d10, d0, #46\n\t"
        "vsri.u64	d10, d0, #18\n\t"
        "vshl.u64	d11, d4, #30\n\t"
        "vsri.u64	d11, d4, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d0, #23\n\t"
        "vsri.u64	d10, d0, #41\n\t"
        "vshl.u64	d11, d4, #25\n\t"
        "vsri.u64	d11, d4, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d12, d28\n\t"
        "vmov	d8, d0\n\t"
        "veor	d10, d5, d6\n\t"
        "vadd.i64	d3, d12\n\t"
        "vbsl	d8, d1, d2\n\t"
        "vbsl	d10, d4, d6\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d7, d3\n\t"
        "vadd.i64	d3, d10\n\t"
        /* Round 13 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d7, #50\n\t"
        "vsri.u64	d8, d7, #14\n\t"
        "vshl.u64	d9, d3, #36\n\t"
        "vsri.u64	d9, d3, #28\n\t"
        "vshl.u64	d10, d7, #46\n\t"
        "vsri.u64	d10, d7, #18\n\t"
        "vshl.u64	d11, d3, #30\n\t"
        "vsri.u64	d11, d3, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d7, #23\n\t"
        "vsri.u64	d10, d7, #41\n\t"
        "vshl.u64	d11, d3, #25\n\t"
        "vsri.u64	d11, d3, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d12, d29\n\t"
        "vmov	d8, d7\n\t"
        "veor	d10, d4, d5\n\t"
        "vadd.i64	d2, d12\n\t"
        "vbsl	d8, d0, d1\n\t"
        "vbsl	d10, d3, d5\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d6, d2\n\t"
        "vadd.i64	d2, d10\n\t"
        /* Calc new W[12]-W[13] */
        "vext.8	q6, q14, q15, #8\n\t"
        "vshl.u64	q4, q13, #45\n\t"
        "vsri.u64	q4, q13, #19\n\t"
        "vshl.u64	q5, q13, #3\n\t"
        "vsri.u64	q5, q13, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q13, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q14, q5\n\t"
        "vext.8	q7, q10, q11, #8\n\t"
        "vadd.i64	q14, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q14, q5\n\t"
        /* Round 14 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d6, #50\n\t"
        "vsri.u64	d8, d6, #14\n\t"
        "vshl.u64	d9, d2, #36\n\t"
        "vsri.u64	d9, d2, #28\n\t"
        "vshl.u64	d10, d6, #46\n\t"
        "vsri.u64	d10, d6, #18\n\t"
        "vshl.u64	d11, d2, #30\n\t"
        "vsri.u64	d11, d2, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d6, #23\n\t"
        "vsri.u64	d10, d6, #41\n\t"
        "vshl.u64	d11, d2, #25\n\t"
        "vsri.u64	d11, d2, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d12, d30\n\t"
        "vmov	d8, d6\n\t"
        "veor	d10, d3, d4\n\t"
        "vadd.i64	d1, d12\n\t"
        "vbsl	d8, d7, d0\n\t"
        "vbsl	d10, d2, d4\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d5, d1\n\t"
        "vadd.i64	d1, d10\n\t"
        /* Round 15 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d5, #50\n\t"
        "vsri.u64	d8, d5, #14\n\t"
        "vshl.u64	d9, d1, #36\n\t"
        "vsri.u64	d9, d1, #28\n\t"
        "vshl.u64	d10, d5, #46\n\t"
        "vsri.u64	d10, d5, #18\n\t"
        "vshl.u64	d11, d1, #30\n\t"
        "vsri.u64	d11, d1, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d5, #23\n\t"
        "vsri.u64	d10, d5, #41\n\t"
        "vshl.u64	d11, d1, #25\n\t"
        "vsri.u64	d11, d1, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d12, d31\n\t"
        "vmov	d8, d5\n\t"
        "veor	d10, d2, d3\n\t"
        "vadd.i64	d0, d12\n\t"
        "vbsl	d8, d6, d7\n\t"
        "vbsl	d10, d1, d3\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d4, d0\n\t"
        "vadd.i64	d0, d10\n\t"
        /* Calc new W[14]-W[15] */
        "vext.8	q6, q15, q8, #8\n\t"
        "vshl.u64	q4, q14, #45\n\t"
        "vsri.u64	q4, q14, #19\n\t"
        "vshl.u64	q5, q14, #3\n\t"
        "vsri.u64	q5, q14, #61\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q4, q14, #6\n\t"
        "veor	q5, q4\n\t"
        "vadd.i64	q15, q5\n\t"
        "vext.8	q7, q11, q12, #8\n\t"
        "vadd.i64	q15, q7\n\t"
        "vshl.u64	q4, q6, #63\n\t"
        "vsri.u64	q4, q6, #1\n\t"
        "vshl.u64	q5, q6, #56\n\t"
        "vsri.u64	q5, q6, #8\n\t"
        "veor	q5, q4\n\t"
        "vshr.u64	q6, #7\n\t"
        "veor	q5, q6\n\t"
        "vadd.i64	q15, q5\n\t"
        "subs	r12, r12, #1\n\t"
        "bne	L_sha512_len_neon_start_%=\n\t"
        /* Round 0 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d4, #50\n\t"
        "vsri.u64	d8, d4, #14\n\t"
        "vshl.u64	d9, d0, #36\n\t"
        "vsri.u64	d9, d0, #28\n\t"
        "vshl.u64	d10, d4, #46\n\t"
        "vsri.u64	d10, d4, #18\n\t"
        "vshl.u64	d11, d0, #30\n\t"
        "vsri.u64	d11, d0, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d4, #23\n\t"
        "vsri.u64	d10, d4, #41\n\t"
        "vshl.u64	d11, d0, #25\n\t"
        "vsri.u64	d11, d0, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d12, d16\n\t"
        "vmov	d8, d4\n\t"
        "veor	d10, d1, d2\n\t"
        "vadd.i64	d7, d12\n\t"
        "vbsl	d8, d5, d6\n\t"
        "vbsl	d10, d0, d2\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d3, d7\n\t"
        "vadd.i64	d7, d10\n\t"
        /* Round 1 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d3, #50\n\t"
        "vsri.u64	d8, d3, #14\n\t"
        "vshl.u64	d9, d7, #36\n\t"
        "vsri.u64	d9, d7, #28\n\t"
        "vshl.u64	d10, d3, #46\n\t"
        "vsri.u64	d10, d3, #18\n\t"
        "vshl.u64	d11, d7, #30\n\t"
        "vsri.u64	d11, d7, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d3, #23\n\t"
        "vsri.u64	d10, d3, #41\n\t"
        "vshl.u64	d11, d7, #25\n\t"
        "vsri.u64	d11, d7, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d12, d17\n\t"
        "vmov	d8, d3\n\t"
        "veor	d10, d0, d1\n\t"
        "vadd.i64	d6, d12\n\t"
        "vbsl	d8, d4, d5\n\t"
        "vbsl	d10, d7, d1\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d2, d6\n\t"
        "vadd.i64	d6, d10\n\t"
        /* Round 2 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d2, #50\n\t"
        "vsri.u64	d8, d2, #14\n\t"
        "vshl.u64	d9, d6, #36\n\t"
        "vsri.u64	d9, d6, #28\n\t"
        "vshl.u64	d10, d2, #46\n\t"
        "vsri.u64	d10, d2, #18\n\t"
        "vshl.u64	d11, d6, #30\n\t"
        "vsri.u64	d11, d6, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d2, #23\n\t"
        "vsri.u64	d10, d2, #41\n\t"
        "vshl.u64	d11, d6, #25\n\t"
        "vsri.u64	d11, d6, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d12, d18\n\t"
        "vmov	d8, d2\n\t"
        "veor	d10, d7, d0\n\t"
        "vadd.i64	d5, d12\n\t"
        "vbsl	d8, d3, d4\n\t"
        "vbsl	d10, d6, d0\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d1, d5\n\t"
        "vadd.i64	d5, d10\n\t"
        /* Round 3 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d1, #50\n\t"
        "vsri.u64	d8, d1, #14\n\t"
        "vshl.u64	d9, d5, #36\n\t"
        "vsri.u64	d9, d5, #28\n\t"
        "vshl.u64	d10, d1, #46\n\t"
        "vsri.u64	d10, d1, #18\n\t"
        "vshl.u64	d11, d5, #30\n\t"
        "vsri.u64	d11, d5, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d1, #23\n\t"
        "vsri.u64	d10, d1, #41\n\t"
        "vshl.u64	d11, d5, #25\n\t"
        "vsri.u64	d11, d5, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d12, d19\n\t"
        "vmov	d8, d1\n\t"
        "veor	d10, d6, d7\n\t"
        "vadd.i64	d4, d12\n\t"
        "vbsl	d8, d2, d3\n\t"
        "vbsl	d10, d5, d7\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d0, d4\n\t"
        "vadd.i64	d4, d10\n\t"
        /* Round 4 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d0, #50\n\t"
        "vsri.u64	d8, d0, #14\n\t"
        "vshl.u64	d9, d4, #36\n\t"
        "vsri.u64	d9, d4, #28\n\t"
        "vshl.u64	d10, d0, #46\n\t"
        "vsri.u64	d10, d0, #18\n\t"
        "vshl.u64	d11, d4, #30\n\t"
        "vsri.u64	d11, d4, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d0, #23\n\t"
        "vsri.u64	d10, d0, #41\n\t"
        "vshl.u64	d11, d4, #25\n\t"
        "vsri.u64	d11, d4, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d12, d20\n\t"
        "vmov	d8, d0\n\t"
        "veor	d10, d5, d6\n\t"
        "vadd.i64	d3, d12\n\t"
        "vbsl	d8, d1, d2\n\t"
        "vbsl	d10, d4, d6\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d7, d3\n\t"
        "vadd.i64	d3, d10\n\t"
        /* Round 5 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d7, #50\n\t"
        "vsri.u64	d8, d7, #14\n\t"
        "vshl.u64	d9, d3, #36\n\t"
        "vsri.u64	d9, d3, #28\n\t"
        "vshl.u64	d10, d7, #46\n\t"
        "vsri.u64	d10, d7, #18\n\t"
        "vshl.u64	d11, d3, #30\n\t"
        "vsri.u64	d11, d3, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d7, #23\n\t"
        "vsri.u64	d10, d7, #41\n\t"
        "vshl.u64	d11, d3, #25\n\t"
        "vsri.u64	d11, d3, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d12, d21\n\t"
        "vmov	d8, d7\n\t"
        "veor	d10, d4, d5\n\t"
        "vadd.i64	d2, d12\n\t"
        "vbsl	d8, d0, d1\n\t"
        "vbsl	d10, d3, d5\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d6, d2\n\t"
        "vadd.i64	d2, d10\n\t"
        /* Round 6 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d6, #50\n\t"
        "vsri.u64	d8, d6, #14\n\t"
        "vshl.u64	d9, d2, #36\n\t"
        "vsri.u64	d9, d2, #28\n\t"
        "vshl.u64	d10, d6, #46\n\t"
        "vsri.u64	d10, d6, #18\n\t"
        "vshl.u64	d11, d2, #30\n\t"
        "vsri.u64	d11, d2, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d6, #23\n\t"
        "vsri.u64	d10, d6, #41\n\t"
        "vshl.u64	d11, d2, #25\n\t"
        "vsri.u64	d11, d2, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d12, d22\n\t"
        "vmov	d8, d6\n\t"
        "veor	d10, d3, d4\n\t"
        "vadd.i64	d1, d12\n\t"
        "vbsl	d8, d7, d0\n\t"
        "vbsl	d10, d2, d4\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d5, d1\n\t"
        "vadd.i64	d1, d10\n\t"
        /* Round 7 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d5, #50\n\t"
        "vsri.u64	d8, d5, #14\n\t"
        "vshl.u64	d9, d1, #36\n\t"
        "vsri.u64	d9, d1, #28\n\t"
        "vshl.u64	d10, d5, #46\n\t"
        "vsri.u64	d10, d5, #18\n\t"
        "vshl.u64	d11, d1, #30\n\t"
        "vsri.u64	d11, d1, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d5, #23\n\t"
        "vsri.u64	d10, d5, #41\n\t"
        "vshl.u64	d11, d1, #25\n\t"
        "vsri.u64	d11, d1, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d12, d23\n\t"
        "vmov	d8, d5\n\t"
        "veor	d10, d2, d3\n\t"
        "vadd.i64	d0, d12\n\t"
        "vbsl	d8, d6, d7\n\t"
        "vbsl	d10, d1, d3\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d4, d0\n\t"
        "vadd.i64	d0, d10\n\t"
        /* Round 8 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d4, #50\n\t"
        "vsri.u64	d8, d4, #14\n\t"
        "vshl.u64	d9, d0, #36\n\t"
        "vsri.u64	d9, d0, #28\n\t"
        "vshl.u64	d10, d4, #46\n\t"
        "vsri.u64	d10, d4, #18\n\t"
        "vshl.u64	d11, d0, #30\n\t"
        "vsri.u64	d11, d0, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d4, #23\n\t"
        "vsri.u64	d10, d4, #41\n\t"
        "vshl.u64	d11, d0, #25\n\t"
        "vsri.u64	d11, d0, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d12, d24\n\t"
        "vmov	d8, d4\n\t"
        "veor	d10, d1, d2\n\t"
        "vadd.i64	d7, d12\n\t"
        "vbsl	d8, d5, d6\n\t"
        "vbsl	d10, d0, d2\n\t"
        "vadd.i64	d7, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d3, d7\n\t"
        "vadd.i64	d7, d10\n\t"
        /* Round 9 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d3, #50\n\t"
        "vsri.u64	d8, d3, #14\n\t"
        "vshl.u64	d9, d7, #36\n\t"
        "vsri.u64	d9, d7, #28\n\t"
        "vshl.u64	d10, d3, #46\n\t"
        "vsri.u64	d10, d3, #18\n\t"
        "vshl.u64	d11, d7, #30\n\t"
        "vsri.u64	d11, d7, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d3, #23\n\t"
        "vsri.u64	d10, d3, #41\n\t"
        "vshl.u64	d11, d7, #25\n\t"
        "vsri.u64	d11, d7, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d12, d25\n\t"
        "vmov	d8, d3\n\t"
        "veor	d10, d0, d1\n\t"
        "vadd.i64	d6, d12\n\t"
        "vbsl	d8, d4, d5\n\t"
        "vbsl	d10, d7, d1\n\t"
        "vadd.i64	d6, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d2, d6\n\t"
        "vadd.i64	d6, d10\n\t"
        /* Round 10 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d2, #50\n\t"
        "vsri.u64	d8, d2, #14\n\t"
        "vshl.u64	d9, d6, #36\n\t"
        "vsri.u64	d9, d6, #28\n\t"
        "vshl.u64	d10, d2, #46\n\t"
        "vsri.u64	d10, d2, #18\n\t"
        "vshl.u64	d11, d6, #30\n\t"
        "vsri.u64	d11, d6, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d2, #23\n\t"
        "vsri.u64	d10, d2, #41\n\t"
        "vshl.u64	d11, d6, #25\n\t"
        "vsri.u64	d11, d6, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d12, d26\n\t"
        "vmov	d8, d2\n\t"
        "veor	d10, d7, d0\n\t"
        "vadd.i64	d5, d12\n\t"
        "vbsl	d8, d3, d4\n\t"
        "vbsl	d10, d6, d0\n\t"
        "vadd.i64	d5, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d1, d5\n\t"
        "vadd.i64	d5, d10\n\t"
        /* Round 11 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d1, #50\n\t"
        "vsri.u64	d8, d1, #14\n\t"
        "vshl.u64	d9, d5, #36\n\t"
        "vsri.u64	d9, d5, #28\n\t"
        "vshl.u64	d10, d1, #46\n\t"
        "vsri.u64	d10, d1, #18\n\t"
        "vshl.u64	d11, d5, #30\n\t"
        "vsri.u64	d11, d5, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d1, #23\n\t"
        "vsri.u64	d10, d1, #41\n\t"
        "vshl.u64	d11, d5, #25\n\t"
        "vsri.u64	d11, d5, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d12, d27\n\t"
        "vmov	d8, d1\n\t"
        "veor	d10, d6, d7\n\t"
        "vadd.i64	d4, d12\n\t"
        "vbsl	d8, d2, d3\n\t"
        "vbsl	d10, d5, d7\n\t"
        "vadd.i64	d4, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d0, d4\n\t"
        "vadd.i64	d4, d10\n\t"
        /* Round 12 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d0, #50\n\t"
        "vsri.u64	d8, d0, #14\n\t"
        "vshl.u64	d9, d4, #36\n\t"
        "vsri.u64	d9, d4, #28\n\t"
        "vshl.u64	d10, d0, #46\n\t"
        "vsri.u64	d10, d0, #18\n\t"
        "vshl.u64	d11, d4, #30\n\t"
        "vsri.u64	d11, d4, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d0, #23\n\t"
        "vsri.u64	d10, d0, #41\n\t"
        "vshl.u64	d11, d4, #25\n\t"
        "vsri.u64	d11, d4, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d12, d28\n\t"
        "vmov	d8, d0\n\t"
        "veor	d10, d5, d6\n\t"
        "vadd.i64	d3, d12\n\t"
        "vbsl	d8, d1, d2\n\t"
        "vbsl	d10, d4, d6\n\t"
        "vadd.i64	d3, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d7, d3\n\t"
        "vadd.i64	d3, d10\n\t"
        /* Round 13 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d7, #50\n\t"
        "vsri.u64	d8, d7, #14\n\t"
        "vshl.u64	d9, d3, #36\n\t"
        "vsri.u64	d9, d3, #28\n\t"
        "vshl.u64	d10, d7, #46\n\t"
        "vsri.u64	d10, d7, #18\n\t"
        "vshl.u64	d11, d3, #30\n\t"
        "vsri.u64	d11, d3, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d7, #23\n\t"
        "vsri.u64	d10, d7, #41\n\t"
        "vshl.u64	d11, d3, #25\n\t"
        "vsri.u64	d11, d3, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d12, d29\n\t"
        "vmov	d8, d7\n\t"
        "veor	d10, d4, d5\n\t"
        "vadd.i64	d2, d12\n\t"
        "vbsl	d8, d0, d1\n\t"
        "vbsl	d10, d3, d5\n\t"
        "vadd.i64	d2, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d6, d2\n\t"
        "vadd.i64	d2, d10\n\t"
        /* Round 14 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d6, #50\n\t"
        "vsri.u64	d8, d6, #14\n\t"
        "vshl.u64	d9, d2, #36\n\t"
        "vsri.u64	d9, d2, #28\n\t"
        "vshl.u64	d10, d6, #46\n\t"
        "vsri.u64	d10, d6, #18\n\t"
        "vshl.u64	d11, d2, #30\n\t"
        "vsri.u64	d11, d2, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d6, #23\n\t"
        "vsri.u64	d10, d6, #41\n\t"
        "vshl.u64	d11, d2, #25\n\t"
        "vsri.u64	d11, d2, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d12, d30\n\t"
        "vmov	d8, d6\n\t"
        "veor	d10, d3, d4\n\t"
        "vadd.i64	d1, d12\n\t"
        "vbsl	d8, d7, d0\n\t"
        "vbsl	d10, d2, d4\n\t"
        "vadd.i64	d1, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d5, d1\n\t"
        "vadd.i64	d1, d10\n\t"
        /* Round 15 */
        "vld1.64	{d12}, [r3]!\n\t"
        "vshl.u64	d8, d5, #50\n\t"
        "vsri.u64	d8, d5, #14\n\t"
        "vshl.u64	d9, d1, #36\n\t"
        "vsri.u64	d9, d1, #28\n\t"
        "vshl.u64	d10, d5, #46\n\t"
        "vsri.u64	d10, d5, #18\n\t"
        "vshl.u64	d11, d1, #30\n\t"
        "vsri.u64	d11, d1, #34\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vshl.u64	d10, d5, #23\n\t"
        "vsri.u64	d10, d5, #41\n\t"
        "vshl.u64	d11, d1, #25\n\t"
        "vsri.u64	d11, d1, #39\n\t"
        "veor	d8, d10\n\t"
        "veor	d9, d11\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d12, d31\n\t"
        "vmov	d8, d5\n\t"
        "veor	d10, d2, d3\n\t"
        "vadd.i64	d0, d12\n\t"
        "vbsl	d8, d6, d7\n\t"
        "vbsl	d10, d1, d3\n\t"
        "vadd.i64	d0, d8\n\t"
        "vadd.i64	d10, d9\n\t"
        "vadd.i64	d4, d0\n\t"
        "vadd.i64	d0, d10\n\t"
        /* Add in digest from start */
        "vldm.64	%[sha512], {d8-d15}\n\t"
        "vadd.i64	q0, q0, q4\n\t"
        "vadd.i64	q1, q1, q5\n\t"
        "vadd.i64	q2, q2, q6\n\t"
        "vadd.i64	q3, q3, q7\n\t"
        "vstm.64	%[sha512], {d0-d7}\n\t"
        "subs	%[len], %[len], #0x80\n\t"
        "bne	L_sha512_len_neon_begin_%=\n\t"
        : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
        : [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k), [L_SHA512_transform_neon_len_k] "r" (L_SHA512_transform_neon_len_k)
        : "memory", "r3", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
    );
}

#endif /* !WOLFSSL_ARMASM_NO_NEON */
#endif /* !__aarch64__ */
#endif /* WOLFSSL_ARMASM */
